## data manipulation
library(reshape2)
library(plyr)
library(data.table)
library(stringr)

## math
library(Matrix)
library(MASS)
library(sandwich)

## output
library(ggplot2)
library(xtable)



##########
## prep ##
##########

red <- '#A31F34'
blue <- '#325585'

load('data/sqf_fryer_2003_2013.Rdata')
d <- d2; rm(d2)

## fryer (nd), empirical analysis of racial differences in police use of force
## figure 1 caption (use of force types)
##   0: when the police report using no force in a stop and frisk interaction
##   1: using at least hands
##   2: at least pushing a civilian to a wall
##   3: at least using handcuffs
##   4: at least drawing a weapon on a civilian
##   5: at least pushing a civilian to the ground
##   6: at least pointing a weapon at a civilian
##   7: at least using a pepper spray or a baton on a civilian
force.levels <- c(
  'At least\nuse of hands',
  'At least\npush to wall',
  'At least\nuse of handcuffs',
  'At least\ndraw weapon',
  'At least\npush to ground',
  'At least\npoint weapon',
  'Use baton or\npepper spray'
)

## set white as base level
races.abbr <- c('white', 'black', 'hisp', 'asian', 'other')
races <- c('white', 'black', 'hispanic', 'asian', 'other')
d$race <- factor(d$race2,
                 levels = races.abbr,
                 labels = races
                 )

## subset to variables used in fryer's simplest models
##   (regress on racial dummies, cluster on precinct)
d <- as.data.table(d)
d <- d[, c('race', 'force', 'pct')]
d <- na.omit(d)




#######################################################################
## plot corrected odds ratios, vs fryer's results with data dropping ##
#######################################################################

results.fryer <- read.csv(
  'data/FryerNDAppTable3Coefs.csv',
  stringsAsFactors = FALSE
)
results.fryer$type <- 'fryer'
## fryer multiplies outcome by 100 for use of force levels 4+
results.fryer$est <-
  results.fryer$est /
  ifelse(results.fryer$thresh <= 3, 1, 100)
results.fryer$se <-
  results.fryer$se /
  ifelse(results.fryer$thresh <= 3, 1, 100)

n.fryer <- c(4927962,
             4152918,
             4017783,
             3957687,
             3950324,
             3918741,
             3900977
             )
n.replication <- ldply(
  1:7,  # violence thresholds, e.g. 1 = 'at least use of hands'
  function(thresh){
    data.frame(force.level = gsub('\n', ' ', force.levels[thresh]),
               fryer = n.fryer[thresh],
               replicated = sum(d$force == 0 | d$force >= thresh),
               correct = nrow(d)
               )
  })
print(
  xtable(n.replication),
  include.rownames = FALSE
)

results.replication <- ldply(
  1:7,  # violence thresholds, e.g. 1 = 'at least use of hands'
  function(thresh){
    cat(thresh)
    fryer.ind <- which(d$force == 0 | d$force >= thresh)
    mod <- lm(force >= thresh ~ race,
              d[fryer.ind,]
              )
    vcovCL <- vcovCL(mod,
                     cluster = d$pct[fryer.ind]
                     )
    return(data.frame(thresh = thresh,
                      thresh.label = force.levels[thresh],
                      type = 'replication',
                      coef = names(coef(mod)),
                      est = coef(mod),
                      se = sqrt(diag(vcovCL))
                      )
           )
  })

results.correct <- ldply(
  1:7,  # violence thresholds, e.g. 1 = 'at least use of hands'
  function(thresh){
    mod <- lm(force >= thresh ~ race, d)
    vcovCL <- vcovCL(mod,
                     cluster = d$pct
                     )
    return(data.frame(thresh = thresh,
                      thresh.label = force.levels[thresh],
                      type = 'corrected',
                      coef = names(coef(mod)),
                      est = coef(mod),
                      se = sqrt(diag(vcovCL))
                      )
           )
  })

## merge results
results <- rbind.fill(results.fryer, results.replication, results.correct)

## labels for plots
results$thresh.label <- factor(
  force.levels[results$thresh],
  levels = force.levels,
  ordered = TRUE
)

results$coef <- gsub('^race', '', results$coef)
results$coef <- str_to_title(results$coef)
results <- results[results$coef %in% c('Black', 'Hispanic'),]
results$type <- factor(results$type,
                       levels = c('fryer',
                                  'replication',
                                  'corrected'
                                  ),
                       ordered = TRUE
                       )
results$x <- as.numeric(results$type)

## make plot

ggplot(results[results$coef == 'Black',],
       aes(x = x, # update feb17
           y = 1000 * est,
           ymin = 1000 * est + 1000 * qnorm(.025) * se,
           ymax = 1000 * est + 1000 * qnorm(.975) * se,
           color = type,
           shape = type
           )
       ) +
  ## invisible point to space y-axis correctly
  geom_point(data = data.frame(thresh.label = 'At least\nuse of hands',
                               x = 1, # update feb17
                               est = .1,
                               se = 0,
                               type = 'fryer'
                               ),
             col = NA
             ) +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  geom_point(position = position_dodge(width = .2), size = 2) +
  geom_errorbar(width = .1, position = position_dodge(width = .2), size = .75) +
  facet_wrap('thresh.label', scales = 'free', nrow = 1) +
  ggtitle('Black civilians, versus white') +
  ylab('Racial difference in use of force\n(per thousand encounters)\n') +
  xlab(NULL) +
  scale_color_manual(
    values = c(fryer = 'black',
               replication = blue,
               corrected = red
               ),
    labels = c('Fryer (2019)',
               'Replication',
               'Corrected coding'
               ),
    guide = guide_legend(override.aes = list(color = 'white'))
  ) +
  scale_shape_manual(
    values = c(fryer = 0,
               replication = 1,
               corrected = 2
               ),
    labels = c('Fryer (2019)',
               'Replication',
               'Corrected coding'
               )
  ) +
  theme_light(base_size = 22) +
  theme(legend.position = 'bottom',
        legend.direction = 'horizontal',
        legend.text.align = 0,
        axis.text.x = element_blank(),
        legend.key.size = unit(1, 'cm')
        ) +
  guides(color = guide_legend(title = NULL,
                              override.aes = list(color = 'white')
                              ),
         shape = guide_legend(title = NULL)
         ) +
  theme(
    legend.position = 'bottom',
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white"),
    legend.key = element_rect(fill = "white")
  )
ggsave('pics/replication_black.pdf', width = 18, height = 6)


## make plot

ggplot(results[results$coef == 'Hispanic',],
       aes(x = x, # update feb17
           y = 1000 * est,
           ymin = 1000 * est + 1000 * qnorm(.025) * se,
           ymax = 1000 * est + 1000 * qnorm(.975) * se,
           color = type,
           shape = type
           )
       ) +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  geom_point(position = position_dodge(width = .2), size = 2) +
  geom_errorbar(width = .1, position = position_dodge(width = .2), size = .75) +
  facet_wrap('thresh.label', scales = 'free', nrow = 1) +
  ggtitle('Hispanic civilians, versus white') +
  ylab('Racial difference in use of force\n(per thousand encounters)\n') +
  xlab(NULL) +
  scale_color_manual(
    values = c(fryer = 'black',
               replication = blue,
               corrected = red
               ),
    labels = c('Fryer (2019)',
               'Replication',
               'Corrected coding'
               )
  ) +
  scale_shape_manual(
    values = c(fryer = 0,
               replication = 1,
               corrected = 2
               ),
    labels = c('Fryer (2019)',
               'Replication',
               'Corrected coding'
               )
  ) +
  theme_light(base_size = 22) +
  theme(legend.position = 'bottom',
        legend.direction = 'horizontal',
        legend.text.align = 0,
        axis.text.x = element_blank(),
        legend.key.size = unit(1, 'cm')
        ) +
  guides(color = guide_legend(title = NULL),
         shape = guide_legend(title = NULL)
         )
ggsave('pics/replication_hispanic.pdf', width = 18, height = 6)
